package com.nku.mason.lucene;

import java.io.StringReader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.wltea.analyzer.lucene.IKAnalyzer;

import com.nku.mason.lucene.util.*;

/**
 * 测试Lucene内置和第三方分词器的分词效果
 * 
 * @author Mason
 * 
 */
public class TestAnalyzer {
	private static void testAnalyzer(Analyzer analyzer, String text) throws Exception {
		System.out.println("当前使用的分词器：" + analyzer.getClass());
		TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(text));
		tokenStream.addAttribute(TermAttribute.class);
		while (tokenStream.incrementToken()) {
			TermAttribute termAttribute = tokenStream.getAttribute(TermAttribute.class);
			System.out.println(termAttribute.term());
		}
	}

	public static void main(String[] args) throws Exception {
		// Lucene内存的分词器
//		testAnalyzer(new StandardAnalyzer(LuceneUtil.getVersion()), "南开大学it电光学院");
//		testAnalyzer(new ArabicAnalyzer(LuceneUtil.getVersion()), "南开大学it电光学院");
		testAnalyzer(new IKAnalyzer(), "南开大学it电光学院");
	}
}
